clear
set more off
macro drop all
capture log close

/********************************************************************************
Discrimination in Multi-Phase Systems: Evidence from Child Protection
Panel Analysis of the Effect of FC Placement on Child Outcomes

Created on: 2/26/19

Last Modified on: 2/20/2024

Description: Construct main panel for analysis 

Note that we have removed the file directory names from this program for 
confidentiality reasons.
********************************************************************************/

** Setting the Directory
global rawdata 
global cleandata 
global tmp 
global jj

**************************
**(0) SET UP
**************************

**Load the master child*investigation level data
use "$cleandata/sample_panel.dta", clear

**Control variables
gen pre_abuse=phyab==1

foreach x in female racecat parent pre_poor pre_attend ///
	pre_sped pre_math pre_reading pre_rep_grade pre_expelled ///
	pre_math_reading pre_math2 pre_math3 pre_reading2 pre_reading3 ///
	pre_sch_urban pre_charter pre_sch_white pre_sch_black ///
	pre_sch_hisp pre_sch_frpl pre_n_cbg pre_nbhd_medinc ///
	pre_nbhd_emp pre_nbhd_bapl pre_nbhd_white pre_nbhd_black ///
	pre_nbhd_hisp pre_homeless {
	gen miss_`x'=(`x'==.)
	replace `x'=0 if miss_`x'==1
}

**Create outcome variables that vary over time
*********note: many are already created since they are pulled directly from other
*********sources. But some (grade repetition, test scores, mobility, maltreatment),
*********I need to create on my own.
sort ric inv_caseid year

************(a) repeated grade
gen rep_grade=1 if ric==ric[_n-1] & inv_caseid==inv_caseid[_n-1] & grade_fnl==grade_fnl[_n-1] & inrange(grade_fnl,0,12)
replace rep_grade=0 if ric==ric[_n-1] & inv_caseid==inv_caseid[_n-1] & grade_fnl!=grade_fnl[_n-1] & inrange(grade_fnl,0,12)

************(b) test scores
gen math=.
gen reading=.
gen took_math=.
gen took_reading=.
forv i=3/8 {
	foreach s in math reading {
		replace `s'=post_`s'_g`i' if grade_fnl==`i'
		replace took_`s'=post_took_`s'_g`i' if grade_fnl==`i'
	}
}

***********(c) mobility- I create mobility indicators in post_variables.do
merge 1:1 ric inv_caseid year using "$cleandata/student_year_mobility.dta"
drop if _merge==2
replace diff_school=0 if _merge==1
replace diff_cb=0 if _merge==1
drop _merge

***********(d) maltreatment
merge 1:1 vicid inv_caseid year using "$cleandata/vic_year_inv.dta"
tab _merge
drop if _merge==2
replace inv=0 if _merge==1
replace inv_sub=0 if _merge==1
drop _merge

***********(e) juvenile justice
preserve
import delimit using $jj/20190313-jj-valid-rics.csv, clear varn(1)
gduplicates drop match_group, force	//there are 2 match groups corresponding to more than 1 ric//
tempfile jj_link
save `jj_link'
restore

preserve
gzuse $jj/jj_filing_dates.dta.gz, clear
keep match_group case_date
gduplicates drop
merge m:1 match_group using `jj_link'
keep if _merge==3
drop _merge
drop match_group
**Reshape to the student*SY level-- doing this crudely using a 9/1 cutoff for now
gen year=year(case_date) if month(case_date)<=8
replace year=year(case_date)+1 if month(case_date)>=9
keep ric year
gduplicates drop
tempfile jj_sy
save `jj_sy'
restore

merge m:1 ric year using `jj_sy'
drop if _merge==2
cap drop jj
gen jj=(_merge==3)
drop _merge
replace jj=. if year>2015	//JJ data ends in 2015 for now//	
replace jj=. if year-cps_sy+age_inv>16	//aged out of JJ system//
foreach c in Ingham Kent Keweenaw Berrien Ottawa Delta Kalamazoo Washtenaw {
	replace jj=. if worker_county=="`c' County" //counties without JJ coverage//
}

**Only keep post years
keep if year>cps_sy

**Clean category variables (there are very minor discrepancies from the raw data)
replace categ=1 if fc==1
replace categ=2 if categ==1 & prep==1 & fc==0
replace categ=4 if prep==0 & inrange(categ,1,3)
gen required_services=inrange(categ,1,2)
 
**Create one index where I ignore missing outcomes and another where I impute to
**the random assignment group mean, as in Kling (2007).
foreach x in inv inv_sub attend rep_grade math reading jj {
	sum `x' if fc==0 //standardize according to control group distribution//
	gen `x'_std=(`x'-r(mean))/r(sd)
}
foreach x in inv inv_sub rep_grade jj {
	replace `x'_std=-1*`x'_std //recode variables so positive is better//
}
egen index=rowmean(inv_std inv_sub_std attend_std rep_grade_std math_std reading_std jj_std)
foreach x in inv inv_sub attend rep_grade math reading jj { 
	forv i=0/1 {
		sum `x'_std if fc==`i' //impute missing values//
		replace `x'_std=r(mean) if `x'_std==. & fc==`i'
	}
}
egen index_imputed=rowmean(inv_std inv_sub_std attend_std rep_grade_std math_std reading_std jj_std)

**Create math and reading percentiles
foreach x in math reading {
	gen `x'_pctile=normal(`x')
}

**Drop students in degenerate rotation groups
preserve
gduplicates drop ric inv_caseid, force
bysort rotation: gen N=_N
drop if N==1
keep ric inv_caseid
tempfile two_workers
save `two_workers'
restore
merge m:1 ric inv_caseid using `two_workers'
keep if _merge==3
drop _merge 

**Create indicator for comparison group
gen nofc=fc==0
	
**Save master panel file for analysis
save "$cleandata/sample_panel_for_analysis.dta", replace
